//merge in disambiguated inventor data
//to make things more streamlined, just take the vars we need
use "${filedata}transactions_application_2023.dta", clear
keep application_number filing_year filing_date patent_issued
merge 1:m application_number using "${filedata}disambig_formerge.dta"
drop if _merge!=3
drop _merge

//construct inventor-level vars
rename inventor_disambiguated_disambig disambig_ID
drop if disambig_ID==.
drop if filing_year== .

sort disambig_ID filing_date
gegen prev_app=rank(filing_date),by(disambig_ID)
replace prev_app=prev_app-1

bysort disambig_ID (filing_date): gen prev_pat=sum(patent_issued[_n-1])

winsor2 prev_app prev_pat,replace cuts(0 99) 

gegen first_app_date=min(filing_date), by(disambig_ID)
gen days_since_firstapp= filing_date-first_app_date
gen ysfa=floor(days_since_firstapp/365)

replace ysfa=. if filing_date==first_app_date
gen first_app=(filing_date==first_app_date)

gegen tag_app=tag(application_number)
gegen max_app=max(prev_app),by(application_number)
gegen max_pat=max(prev_pat),by(application_number)
gegen min_years=min(ysfa),by(application_number)

//does anyone on the app ever apply before?
gen prev_apps=(max_app > 0)
//does anyone on the app ever patent before?
gen prev_pats=(max_pat > 0)

gegen num_applicants=count(1), by(application_number)
keep application_number min_years num_applicants prev_apps prev_pats
duplicates drop

//now, merge back to transaction-application data
merge 1:1 application_number using "${filedata}transactions_application_2023.dta"
drop if _merge==1 //there should be no 1s
rename _merge inventor_disambiguated

///generate quality controls

gen appl_id=application_number
destring appl_id,replace
merge 1:m appl_id using "${rawdata}pgpub_document_stats"
drop if _merge==2
drop _merge appl_id

sum pub_clm_ct,de
replace pub_clm_ct=`r(p99)' if pub_clm_ct>`r(p99)' & pub_clm_ct!=.

sum pub_wrd_min,de
replace pub_wrd_min=`r(p99)' if pub_wrd_min>`r(p99)' & pub_wrd_min!=.

**# All controls (missing)

foreach i in pub_clm_ct pub_wrd_ct pub_wrd_min pub_dep_clm_ct pub_dep_wrd_ct pub_dep_wrd_min pub_wrd_avg pub_dep_wrd_avg min_years prev_apps prev_pats {
	gen m_`i'=(`i'==.)
	replace `i'=0 if `i'==.
}

save "${filedata}transactions_application_controls_2023.dta", replace
